Interactive plots are dynamic data visualizations that and respond to viewers’ actions (e.g., clicking, hovering).
Interactive plots can be used to…
Help audiences quickly digest information in complex data visualizations
Support data-driven decision-making with enhanced synthesis
Facilitate stakeholder communication and collaboration
Enhance strategic clarity, particularly with complex data (e.g., many groups)
R is a very useful tool for data visualization because it can be used to easily integrate interactive visualizations directly into deliverables, from reports to data dashboards. R is also accessible and open-source with many well-documented packages that can be used to create custom interactive visualizations.
Define question to be addressed and explore the data
Create static versions of each data visualizations following data visualization principles
Plot types we’ll examine today include:
Line plots
Bar plots
Rank plots
Scatter plots
See all software references at the end of the tutorial.
# load packages and describe their uses
library(tidyverse) # data cleaning, organization, and visualization
library(plotly) #key interactive plot package
library(ggiraph) #key interactive plot package
library(MetBrewer) #plot customization functions
library(tidytext) #plot customization functions
library(ggbump) #plot customization functions
library(patchwork) #plot customization functions
#color palette
MetBrew_Signac <- MetBrewer::met.brewer("Signac", n = 21)
## install packages by running install.packages("PACKAGE-NAME")
## cite packages by running citation(package = "PACKAGE-NAME")
## load packages by running library(PACKAGE-NAME)
## specify function from a specific package by running PACKAGE-NAME::function()
## troubleshoot functions from packages by running ?PACKAGE-NAME::function()
Question to be addressed: For this tutorial, we will be examining performance and attendance of women’s soccer/football teams, and how they might be related, in the top-tier league in the United Kingdom.
Data: We will looking at English Women’s Football data from the EWF database (The English Women’s Football Database, 2025), which is available on Github at the following URL: https://github.com/probjects/ewf-database. You can download the data and access the data dictionary directly at the database link.
#import matches dataset, one row per game per season
df_matches <- read_csv("./data/ewf_matches.csv") %>%
map_df(., ~ gsub("Women", "", .x, fixed = T)) %>%
map_df(., ~ gsub("Ladies", "", .x, fixed = T)) %>%
map_df(., ~ gsub("Belles", "", .x, fixed = T)) #remove parts of team names since they're used inconsistently, e.g., Arsenal = Arsenal Ladies
#examine data
head(df_matches)
## # A tibble: 6 × 22
## season_id season tier division match_id match_name date attendance
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 S-2011-2011-1-S 2011-2011 1 FA 's Su… M-2011-… "Chelsea … 2011… 2510
## 2 S-2011-2011-1-S 2011-2011 1 FA 's Su… M-2011-… "Lincoln … 2011… 742
## 3 S-2011-2011-1-S 2011-2011 1 FA 's Su… M-2011-… "Birmingh… 2011… 602
## 4 S-2011-2011-1-S 2011-2011 1 FA 's Su… M-2011-… "Liverpoo… 2011… 835
## 5 S-2011-2011-1-S 2011-2011 1 FA 's Su… M-2011-… "Everton … 2011… 220
## 6 S-2011-2011-1-S 2011-2011 1 FA 's Su… M-2011-… "Liverpoo… 2011… 341
## # ℹ 14 more variables: home_team_id <chr>, home_team_name <chr>,
## # away_team_id <chr>, away_team_name <chr>, score <chr>,
## # home_team_score <chr>, away_team_score <chr>, home_team_score_margin <chr>,
## # away_team_score_margin <chr>, home_team_win <chr>, away_team_win <chr>,
## # draw <chr>, result <chr>, note <chr>
#import standings dataset, one row per team per season
df_standings <- read_csv("./data/ewf_standings.csv") %>%
map_df(., ~ gsub("Women", "", .x, fixed = T)) %>%
map_df(., ~ gsub("Ladies", "", .x, fixed = T)) %>%
map_df(., ~ gsub("Belles", "", .x, fixed = T))
#examine data
head(df_standings)
## # A tibble: 6 × 17
## season_id season tier division position team_id team_name played wins draws
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 S-2011-20… 2011-… 1 FA 's S… 1 T-001-T "Arsenal… 14 10 2
## 2 S-2011-20… 2011-… 1 FA 's S… 2 T-003-T "Birming… 14 8 5
## 3 S-2011-20… 2011-… 1 FA 's S… 3 T-013-T "Everton… 14 7 4
## 4 S-2011-20… 2011-… 1 FA 's S… 4 T-016-T "Lincoln… 14 6 3
## 5 S-2011-20… 2011-… 1 FA 's S… 5 T-006-T "Bristol… 14 4 4
## 6 S-2011-20… 2011-… 1 FA 's S… 6 T-008-T "Chelsea… 14 4 3
## # ℹ 7 more variables: losses <chr>, goals_for <chr>, goals_against <chr>,
## # goal_difference <chr>, points <chr>, point_adjustment <chr>,
## # season_outcome <chr>
attendanceplot <- df_matches %>% #data organization
subset(tier == 1 & season != "2020-2021") %>% #select top tier and exclude 2020 season
select(c(home_team_name, attendance, match_name, date, season)) %>% #select only columns of interest
group_by(season, home_team_name) %>%
mutate(mean_attendance = mean(as.numeric(attendance), na.rm = T)) %>% #add average attendance column, grouped by season and team
ungroup() %>%
select(c(home_team_name, mean_attendance, season)) %>% #select needed columns only
distinct() %>%
ggplot(aes(x = as.factor(season), y = as.numeric(mean_attendance), group=as.factor(home_team_name), color=as.factor(home_team_name))) + #set axes and groups
geom_point(size = 2) + #add points to plot
geom_path(inherit.aes = TRUE) + #add lines to plot
scale_color_manual(values= MetBrew_Signac, name = "Team") + #set color palette
theme_classic() + #set theme
labs(title = "Average game attendance per home team per season", #add axis labels, caption, and titles/subtitles
subtitle = "English Football Women's Super League (tier 1)",
x = "Season", y = "Average game attendance",
caption = "Source: EWF Database (2025), https://github.com/probjects/ewf-database. Excludes 2020-2021.") +
theme(legend.position="bottom", #put legend on the bottom
legend.key.size = unit(0.05, "cm")) + #decrease size between legend items
theme(legend.text = element_text(size = 10)) + #change legend font size
theme(axis.text.x = element_text(angle = 45, vjust = 0.75, hjust = 0.75)) #rotate season labels so they don't overlap
attendanceplot
plotly::ggplotly(attendanceplot)
#same data organization as static version
plotly_df <- df_matches %>%
subset(tier == 1 & season != "2020-2021") %>%
select(c(home_team_name, attendance, match_name, date, season)) %>% #select only columns of interest
group_by(season, home_team_name) %>%
mutate(mean_attendance = mean(as.numeric(attendance), na.rm = T)) %>% #add average attendance column, grouped by season and team
ungroup() %>%
select(c(home_team_name, mean_attendance, season)) %>% #select only columns of interest
distinct()
attendanceplotly <- plotly::plot_ly(data = plotly_df, x = ~as.factor(season), y = ~mean_attendance, split = ~home_team_name, color = ~home_team_name, #set axes in plotly
type = 'scatter', #set plot type
mode = 'lines+markers', #choose items to include in scatter plot
text = ~paste("Home team:", home_team_name, "Average attendance:", mean_attendance), #set hover labels
hoverinfo = 'text',
marker = list(size = 6), #set dot size
line = list(width = 2) #set line size
) %>%
layout( #set titles, legend, and axis labels
title = "Average game attendance per season per home team (double click in legend)",
subtitle = "English Football Women's Super League (tier 1), Source: EWF Database (2025), https://github.com/probjects/ewf-database. Excludes 2020-2021.",
hovermode = "closest",
legend = list(itemclick = "toggleothers"),
xaxis = list(title = "Season"),
yaxis = list(title = "Average game attendance")
)
attendanceplotly
#use most of same plot code as static version, but make point and line interactive with ggiraph
attendanceplot_giraph <- df_matches %>%
subset(tier == 1 & season != "2020-2021") %>% #select top tier and exclude 2020 season
select(c(home_team_name, attendance, match_name, date, season)) %>% #select only columns of interest
group_by(season, home_team_name) %>%
mutate(mean_attendance = round(mean(as.numeric(attendance), na.rm = T), 2)) %>% #add average attendance column, grouped by season and team
ungroup() %>%
select(c(home_team_name, mean_attendance, season)) %>% #select needed columns only
distinct() %>%
ggplot(aes(x = as.factor(season), y = as.numeric(mean_attendance), group=as.factor(home_team_name), color=as.factor(home_team_name), #set axes and color
tooltip=paste("Team:", home_team_name, "<br>Average attendance:", mean_attendance), #text for interactive hover
data_id = as.factor(home_team_name))) + #set data id for hover
geom_point_interactive(size = 2, hover_nearest = TRUE) + #add interactive points to plot
geom_line_interactive() + #add interactive lines to plot
scale_color_manual(values= MetBrew_Signac, name = "Team") + #set color palette
theme_classic() + #set theme
labs(title = "Average game attendance per home team per season", #add labels for title/subtitle, caption, and axes
subtitle = "English Football Women's Super League (tier 1)",
x = "Season", y = "Average game attendance",
caption = "Source: EWF Database (2025), https://github.com/probjects/ewf-database. Excludes 2020-2021.") +
theme(legend.position="bottom",
legend.key.size = unit(0.05, "cm"),
legend.text = element_text(size = 7)) + #adjust positioning for legend, font size
theme(axis.text.x = element_text(angle = 45, vjust = 0.75, hjust = 0.75)) #rotate season labels so they don't overlap
css_dot_hover <- girafe_css_bicolor(primary = "aquamarine", secondary = "blue") #set colors to be used for opts_hover
attendanceplot_giraphplot <- girafe(ggobj = attendanceplot_giraph, options = list(
opts_hover = opts_hover(css = css_dot_hover), #customize fill and outline of hover
opts_tooltip = opts_tooltip(css = "padding:3px;background-color:white;color:black;"), #customize size and colors of text label and background when hovering
opts_sizing(rescale = TRUE, width = 1) #customize resizing
)
)
attendanceplot_giraphplot
goalsdiffplot <- df_standings %>%
subset(tier == 1) %>% #subset only top tier
select(c(team_name, goal_difference, goals_for, goals_against, season)) %>% #select columns of interest
dplyr::group_by(season) %>%
dplyr::arrange(as.numeric(goal_difference)) %>% #reorder by goal difference within season
dplyr::ungroup() %>%
ggplot(aes(x = reorder(as.factor(team_name), as.numeric(goal_difference)), y = as.numeric(goal_difference), fill=as.factor(team_name), #set x and y axes, and group by team for plot
goal_difference_text = goal_difference, team_text = team_name)) + #text for ggplotly labels
scale_fill_manual(values=c(MetBrew_Signac), name = "Team name") + #set fill palette
scale_color_manual(values=c(MetBrew_Signac), name = "Team name") + #set color palette
geom_bar(stat = "identity") + #add bar for barplot
theme_classic() + #set theme
coord_flip() + #flip X and Y axis
facet_wrap(~season, scales = "free")+ #create facets by season
geom_hline(yintercept = 0, linewidth = 0.5, color = "gray") + #add line at 0 (no goal difference)
labs(title = "Total goal difference per team per season", #add axis labels, caption, title/subtitle
subtitle = "English Football Women's Super League (tier 1)",
caption = "Source: EWF Database (2025), https://github.com/probjects/ewf-database",
y = "Goal difference (for-against)", x = "Team") +
theme(legend.position="none")
goalsdiffplot
ggplotly(goalsdiffplot,
width = 1000, #adjust width
height = 800, #adjust height
tooltip = c("team_text", "goal_difference_text") #add hover labels
)
#use most of same plot code as static version, but make bar interactive with ggiraph
goalsdiff_giraph <- df_standings %>%
subset(tier == 1) %>% #subset only top tier
select(c(team_name, goal_difference, goals_for, goals_against, season)) %>% #select only columns of interest
dplyr::group_by(season) %>%
dplyr::arrange(as.numeric(goal_difference)) %>%
dplyr::ungroup() %>%
ggplot(aes(x = reorder(as.factor(team_name), as.numeric(goal_difference)), y = as.numeric(goal_difference), fill=as.factor(team_name),
tooltip=paste("Team:", team_name, "<br>Total goal difference:", goal_difference), #text for interactive hover
data_id = as.factor(team_name))) +
scale_fill_manual(values=c(MetBrew_Signac), name = "Team name") + #set fill palette
scale_color_manual(values=c(MetBrew_Signac), name = "Team name") + #set color palette
geom_bar_interactive(stat = "identity") + #add interactive bar
theme_classic() + #set theme
coord_flip() + #flip x and y axis
facet_wrap(~season, scales = "free")+ #facet by season
geom_hline(yintercept = 0, linewidth = 0.5, color = "gray") +
labs(title = "Total goal difference (for-against) per team per season", #add title, subtitle, caption, x and y axis labels
subtitle = "English Football Women's Super League (tier 1)",
caption = "Source: EWF Database (2025), https://github.com/probjects/ewf-database",
y = "Goal difference (for-against)", x = "Team") +
theme(legend.position="none") #remove legend
goalsdiffggiraph_plot <- girafe(ggobj = goalsdiff_giraph,
width_svg = 12, #adjust width
height_svg = 8, #adjust height
options = list(
opts_hover(css = "fill:aquamarine;stroke:black;cursor:pointer;"))) #customize fill and outline of hover
goalsdiffggiraph_plot
pointsplot <- df_standings %>%
subset(tier == 1) %>% #subset only top tier
select(c(team_name, points, season)) %>% #select only columns of interest
ggplot(aes(x=as.factor(season), y=as.numeric(points), fill=as.factor(team_name), #set x and y axes
points_text = points, team_text = team_name)) + #text for ggplotly labels
geom_bar(stat="identity", position = "stack") +
scale_fill_manual(values=c(MetBrew_Signac), name = "Team name") + #set fill palette
scale_color_manual(values=c(MetBrew_Signac), name = "Team name") + #set color palette
labs(title="Points per team by season", #add title, subtitle, caption, x and y axis labels
subtitle = "English Football Women's Super League (tier 1)",
caption = "Win = 3 points, Draw = 1 point, Loss = 0 points. Source: EWF Database (2025), https://github.com/probjects/ewf-database",
x="Season", y = "Points") +
theme_classic() +
theme(legend.position="bottom",
legend.key.size = unit(0.05, "cm")) + #put legend on the bottom and decrease size between items
theme(legend.text = element_text(size = 10)) + #change legend font size
theme(axis.text.x = element_text(angle = 45, vjust = 0.75, hjust = 0.75)) #rotate season labels so they don't overlap
pointsplot
ggplotly(pointsplot,
width = 1000, #adjust width
height = 800, #adjust height
tooltip = c("team_text", "points_text"))
points_ggiraph <- df_standings %>%
subset(tier == 1) %>% #subset only top tier
select(c(team_name, points, season)) %>% #select only columns of interest
ggplot(aes(x=as.factor(season), y=as.numeric(points), fill=as.factor(team_name), #set x and y axes
tooltip=paste("Team:", team_name, "<br>Season total points:", points), #text for interactive hover
data_id = as.factor(team_name))) +
geom_bar_interactive(stat="identity", position = "stack") + #add interactive bar
scale_fill_manual(values=c(MetBrew_Signac), name = "Team name") + #set fill palette
scale_color_manual(values=c(MetBrew_Signac), name = "Team name") + #set color palette
labs(title="Points per team by season", #add title, subtitle, caption, x and y axis labels
subtitle = "English Football Women's Super League (tier 1)",
caption = "Win = 3 points, Draw = 1 point, Loss = 0 points. Source: EWF Database (2025), https://github.com/probjects/ewf-database",
x="Season", y = "Points") +
theme_classic() + #set theme
theme(legend.position="bottom",
legend.key.size = unit(0.05, "cm")) + #put legend on the bottom and decrease size between items
theme(legend.text = element_text(size = 7)) + #change legend font size
theme(axis.text.x = element_text(angle = 45, vjust = 0.75, hjust = 0.75)) #rotate season labels so they don't overlap
pointsggiraph_plot <- girafe(ggobj = points_ggiraph,
options = list(
opts_hover(css = "fill:magenta;stroke:pink;cursor:pointer;"), #customize fill and outline of hover
opts_tooltip = opts_tooltip(css = "padding:2px;background-color:white;color:black;"), #customize size and colors of text label and background when hovering
opts_sizing(rescale = TRUE, width = 1) #customize resizing
)
)
pointsggiraph_plot
rankplot <- df_standings %>%
subset(tier == 1) %>% #subset only top tier
select(c(team_name, position, season)) %>% #select only columns of interest
ggplot(aes(x = as.factor(season), y = as.numeric(position), color = as.factor(team_name), group = as.factor(team_name), fill = as.factor(team_name),
team_text = team_name, rank_text = position)) + #text for ggplotly labels
geom_bump(size = 2) + #add bump plot lines for standings
scale_y_reverse(breaks = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)) + #flip y axis scale so top teams appear at top
geom_point(size = 4) + #add points at rank per season
scale_fill_manual(values=c(MetBrew_Signac), name = "Team name") + #set fill palette
scale_color_manual(values=c(MetBrew_Signac), name = "Team name") + #set color palette
theme_classic() + #set plot theme
labs(y = "Final position", x = "Season", #add title, subtitle, caption, x and y axis labels
title = "Final position per team by season",
subtitle = "English Football Women's Super League (tier 1)",
caption = "Missing 2020-2021. Source: EWF Database (2025), https://github.com/probjects/ewf-database") +
theme(legend.position="bottom",
legend.key.size = unit(0.05, "cm")) + #put legend on the bottom and decrease size between items
theme(legend.text = element_text(size = 10)) + #change legend font size
theme(axis.text.x = element_text(angle = 45, vjust = 0.75, hjust = 0.75)) #rotate season labels so they don't overlap
rankplot
plotly::ggplotly(rankplot,
width = 1000, #adjust width
height = 800, #adjust height
tooltip = c("team_text", "rank_text") #customize hover labels
)